"""
    This function will extract Glassdoor's work histories from workers' resumes.
"""

import pandas as pd

# Extract when birth year available
def extractResumeWorkExp(conn, startYear, endYear, gocConfidence):

    query = '''
                select
                    	main.FK_resumeId, adi.FK_userId,
                    	coalesce(jt.jobTitle, userEnteredJobTitle) as jobTitle,
                    	coalesce(emp.shortName, userEnteredEmployer) as employer,
                    	coalesce(loc.uniqueName, main.userEnteredLocation) as location,
                    	loc.metroId, loc.countryName,
                    	startDate, endDate,
                    	adi.gender, adi.birthYear, adi.highestEducation, adi.currentLocation,
                    	ind.sectorName, ind.industryName,
                    	emp.yearFounded,
                    main.GOC
                from (
                    	select
                    		FK_resumeId, FK_jobTitleId, userEnteredJobTitle,
                    		userEnteredEmployer, FK_employerId, userEnteredLocation,
                    		startDate, endDate,
                        case
                            when jt.gocConfidence > %f THEN goc.occupation END
                        as GOC
                    	from 
                        userProfile..resumeWorkExperience rwe with(nolock)
                    left join bizproj..jobTitle jt with (nolock)
                        on rwe.FK_jobTitleId = jt.PK_id
                    left join 
                        bizproj..glassdoorOccupationCategory goc with(nolock)
                        on goc.PK_id = jt.FK_gocId
                    	where startDate is not null or endDate is not null
                        and rwe.MD_createDateTime >= '2019-01-01'
                ) as main
                -- User Info
                left join (
                	select * from (
                		select
                			PK_id as resumeId, FK_userId, userEnteredLocation as currentLocation, MD_updateDateTime as resumeDate
                		from userProfile..resume with(nolock)
                		where FK_userId > 0 and FK_userId is not null
                	) as r
                	left join (
                		select PK_id as userId, gender, birthYear, highestEducation
                		from bizproj..GDUser with(nolock)
                	) as u on r.FK_userId = u.userId
                ) as adi on adi.resumeId = main.FK_resumeId
                -- Employer / Job Info
                left join bizproj..jobTitle jt with(nolock) on main.FK_jobTitleId = jt.PK_id
                left join bizproj..employer emp with(nolock) on main.FK_employerId = emp.PK_id
                left join reporting..a_employerSummary ae with(nolock) on main.FK_employerId = ae.FK_employerId
                left join bizproj..industry ind with(nolock) on ae.industryNK = ind.NK_industryId
                -- Location
                left join (
                	select shortName, uniqueName, metroId, co.name as countryName
                	from (
                		select shortName, uniqueName, metroId, FK_countryId
                		from (
                			select max(m.row) OVER (PARTITION BY shortName) as totalRow, *
                			from (
                				select
                					ROW_NUMBER() OVER (PARTITION BY shortName ORDER By uniqueName) as row, PK_id as metroId, *
                			 	from atlas..atlas_metro with(nolock)
                			) as m
                		) as mm
                		where mm.totalRow = 1
                	) as met
                	left join atlas..atlas_country co with(nolock) on co.PK_id = met.FK_countryId
                ) as loc on loc.shortName = main.userEnteredLocation
                where (adi.birthYear >= %f and adi.birthYear < %f)
                    -- and adi.gender in ('FEMALE', 'MALE')
            ''' % (gocConfidence, startYear, endYear)

    return pd.read_sql(query,conn)


# Extract when no birth year available
def extractResumeWorkExpNoBirthYear(conn, gocConfidence):

    query = '''
                select
                    	main.FK_resumeId, adi.FK_userId,
                    	coalesce(jt.jobTitle, userEnteredJobTitle) as jobTitle,
                    	coalesce(emp.shortName, userEnteredEmployer) as employer,
                    	coalesce(loc.uniqueName, main.userEnteredLocation) as location,
                    	loc.metroId, loc.countryName,
                    	startDate, endDate,
                    	adi.gender, adi.birthYear, adi.highestEducation, adi.currentLocation, 
                    	ind.sectorName, ind.industryName,
                    	emp.yearFounded,
                    main.GOC
                from (
                    	select
                    		FK_resumeId, FK_jobTitleId, userEnteredJobTitle,
                    		userEnteredEmployer, FK_employerId, userEnteredLocation,
                    		startDate, endDate,
                        case
                            when jt.gocConfidence > %f THEN goc.occupation END
                        as GOC
                    	from 
                        userProfile..resumeWorkExperience rwe with(nolock)
                    left join bizproj..jobTitle jt with (nolock)
                        on rwe.FK_jobTitleId = jt.PK_id
                    left join 
                        bizproj..glassdoorOccupationCategory goc with(nolock)
                        on goc.PK_id = jt.FK_gocId
                    	where startDate is not null or endDate is not null
                        and rwe.MD_createDateTime >= '2019-01-01'
                ) as main
                -- User Info
                left join (
                	select * from (
                		select
                			PK_id as resumeId, FK_userId, userEnteredLocation as currentLocation, MD_updateDateTime as resumeDate
                		from userProfile..resume with(nolock)
                		where FK_userId > 0 and FK_userId is not null
                	) as r
                	left join (
                		select PK_id as userId, gender, birthYear, highestEducation
                		from bizproj..GDUser with(nolock)
                	) as u on r.FK_userId = u.userId
                ) as adi on adi.resumeId = main.FK_resumeId
                -- Employer / Job Info
                left join bizproj..jobTitle jt with(nolock) on main.FK_jobTitleId = jt.PK_id
                left join bizproj..employer emp with(nolock) on main.FK_employerId = emp.PK_id
                left join reporting..a_employerSummary ae with(nolock) on main.FK_employerId = ae.FK_employerId
                left join bizproj..industry ind with(nolock) on ae.industryNK = ind.NK_industryId
                -- Location
                left join (
                	select shortName, uniqueName, metroId, co.name as countryName
                	from (
                		select shortName, uniqueName, metroId, FK_countryId
                		from (
                			select max(m.row) OVER (PARTITION BY shortName) as totalRow, *
                			from (
                				select
                					ROW_NUMBER() OVER (PARTITION BY shortName ORDER By uniqueName) as row, PK_id as metroId, *
                			 	from atlas..atlas_metro with(nolock)
                			) as m
                		) as mm
                		where mm.totalRow = 1
                	) as met
                	left join atlas..atlas_country co with(nolock) on co.PK_id = met.FK_countryId
                ) as loc on loc.shortName = main.userEnteredLocation
                where adi.birthYear is null
                    -- and adi.gender in ('FEMALE', 'MALE')
            ''' % (gocConfidence)

    return pd.read_sql(query,conn)
